CREATE EXTENSION tablefunc;
DROP TABLE IF EXISTS cand_paper.real_data;
DROP TABLE IF EXISTS cand_paper.base_tabulation;
UPDATE cand_paper.cand_data SET latitude=0,longitude=0 WHERE latitude IS NULL;

CREATE TABLE cand_paper.real_data AS
SELECT * FROM cand_paper.ged191 WHERE year=2018 and (region='Africa' or country_id='651');

SELECT * FROM cand_paper.real_data GROUP BY country;

CREATE TABLE cand_paper.base_tabulation AS
with base_tabulation AS (
  SELECT COALESCE(cand.id, reald.id)            as id,
         COALESCE(cand.code_status, 'NONE')     as orig_status,
         COALESCE(reald.code_status, 'DELETED') as final_status
  FROM cand_paper.cand_data cand
         FULL OUTER JOIN
       cand_paper.real_data reald
       USING (id)
) SELECT * FROM base_tabulation;

with type_tabulation AS (
SELECT cand.id as id, cand.code_status as orig_code_status,
       st_setsrid(st_point(cand.longitude,cand.latitude),4326) as cand_geom,
       st_setsrid(st_point(real.longitude,real.latitude),4326) as real_geom,
       cand.dyad_new_id as cand_dyad_id,
       real.dyad_new_id as real_dyad_id,
       cand.high as cand_high,
       real.high as real_high,
       cand.best as cand_best,
       real.best as real_best,
       real.low as real_low,
       cand.low as cand_low,
       cand.date_start::date as cand_dates,
       real.date_start::date as real_dates,
       cand.date_end::date as cand_datee,
       real.date_end::date as real_datee
       FROM cand_paper.cand_data cand, cand_paper.real_data real WHERE cand.id=real.id),
type_tabulation_2 AS (
  SELECT id,
         orig_code_status,
         to_dummy((cand_dyad_id - real_dyad_id))                                                                 as dyad_diff,
         to_dummy(((cand_high - real_high) ^ 2 + (cand_low - real_low) ^ 2 +
                   (cand_best - real_best) ^ 2)::int)                                                           as deaths_diff,
         to_dummy(
             ((cand_dates - real_dates) ^ 2 + (cand_datee - real_datee) ^ 2)::int)                              as date_diff,
         case
           when (st_distance(cand_geom::geography, real_geom::geography) < 1000) THEN 0
           ELSE 1 END                                                                                           as geom_diff
  FROM type_tabulation
),
precross AS (
  SELECT * FROM crosstab('SELECT orig_status, final_status, count(*)::int as cnt FROM cand_paper.base_tabulation GROUP BY orig_status, final_status ORDER BY orig_status, final_status')
  AS ct(rk text, kept INT, deleted INT)
),
precross2 AS (SELECT
  rk AS code_status,
  CASE WHEN rk='NONE' THEN 0 ELSE kept END as retained,
  CASE WHEN rk='NONE' THEN 0 ELSE deleted END as deleted,
  CASE WHEN rk='NONE' THEN kept ELSE 0 END as added
  FROM precross ORDER BY retained DESC),
postcross AS (SELECT orig_code_status as code_status,
       count(*) as total_events,
       sum(dyad_diff) as dyad_diff,
       sum(deaths_diff) as deaths_diff,
       sum(date_diff) as date_diff,
       sum(geom_diff) as geom_diff
       FROM type_tabulation_2 GROUP BY orig_code_status)
SELECT precross2.code_status, precross2.retained, precross2.deleted, precross2.added,
       postcross.dyad_diff,deaths_diff,date_diff,geom_diff
FROM postcross RIGHT JOIN precross2 USING (code_status) ORDER BY retained DESC


